#!/usr/bin/Rscript
##########################################################################################
# Issue Ownership and Agenda Setting in the 2019 Swiss National Elections
##########################################################################################
# Description:
# Script for Figure A2
##########################################################################################
# Content
##########################################################################################
# 1) Dependencies
# 2) Startup
# 3) Load Data
# 4) Transform Data
# 5) Figure A2
##########################################################################################
# 1) Dependencies
##########################################################################################
library(dplyr)
library(tidyverse)
library(data.table)
library(readr)
library(lubridate)
library(ggplot2)
library(ggExtra)
library(ggeffects)
library(ggrepel)
library(ggforce)
library(igraph)
library(graphlayouts)
library(ggraph)
library(scales)
library(purrr)
library(magrittr)
library(cowplot)
library(sysfonts)
##########################################################################################
# 2) Startup
##########################################################################################
rm(list=ls())
# - set dir
args = commandArgs()

scriptName = args[substr(args,1,7) == '--file=']

if (length(scriptName) == 0) {
  scriptName <- rstudioapi::getSourceEditorContext()$path
} else {
  scriptName <- substr(scriptName, 8, nchar(scriptName))
}

pathName = substr(
  scriptName, 
  1, 
  nchar(scriptName) - nchar(strsplit(scriptName, '.*[/|\\]')[[1]][2])
)


# - load fonts used in plots
sysfonts::font_add_google("Montserrat", "Montserrat")
sysfonts::font_add_google("Roboto", "Roboto")

# - load ddl theme
suppressMessages(suppressWarnings(source('../ggplot_theme_ddl.R')))
# - pre-setup for figure 1
ddl_theme_2 <- theme(legend.position = "bottom", legend.title = element_blank(), 
                     strip.background = element_blank(), strip.text = element_text(color = "black"),
                     axis.text.x = element_text(angle = 0, hjust = 0.5, vjust = 0.0, size = 16, color = "black"),  
                     axis.text.y = element_text(hjust=0, size = 16, color = "black"),
                     axis.ticks.y = element_blank(),
                     strip.text.x = element_text(size = 16, color = "black"),
                     axis.title = element_text(size = 16, color = "black"),
                     legend.text = element_text(size = 16, color = "black"),
                     plot.title = element_blank(),
                     plot.margin = unit(c(.5,.5,.5,.5), "cm"),
                     legend.key.size = unit(1.5,"line"),
                     legend.key = element_blank(),
                     axis.line.x = element_line(color="black", size = .5),
                     axis.line.y = element_line(color="black", size = .5),
                     panel.spacing.y = unit(2, "lines"),
                     panel.spacing.x = unit(.8, "lines")) 

# - set directory
setwd(pathName)
parent_path <- getwd()
##########################################################################################
# 3) Load Data
##########################################################################################
press <- read_rds("../Data/pressreleases_2018-2019.RDS")
smd <- read_rds("../Data/smd_minified_2018-2019.RDS")
tweets <- read_rds("../Data/Tweets_minified_2018_2019_curated_sentiment_class.RDS")
##########################################################################################
# 4) Transform Data
##########################################################################################
# Filter only Election Year:
press <- press %>% dplyr::filter(pubDateTime > as.Date("2019-01-01")) %>% 
                   dplyr::filter(pubDateTime < as.Date("2019-10-21")) %>% 
                   dplyr::filter(!`Akteur` %in% c("Travail Suisse", "Schweizer Bauernverband", 
                                                  "Schweizerischer Gewerkschaftsbund", 
                                                  "Verband Schweizerischer Elektrizitätsunternehmen", 
                                                  "Sante Suisse","Schweizerischer Baumeisterverband", 
                                                  "Economie Suisse", "Schweizerischer Gewerbeverband", 
                                                  "Schweizerischer Bankiervereinigung"))

smd <- smd %>% dplyr::filter(pubDateTime > as.Date("2019-01-01")) %>% 
               dplyr::filter(pubDateTime < as.Date("2019-10-21"))

tweets <- tweets %>% dplyr::filter(Datum > as.Date("2019-01-01")) %>% 
                     dplyr::filter(Datum < as.Date("2019-10-21")) %>% 
                     dplyr::filter(Is_retweet == FALSE) %>% 
                     dplyr::filter(`Akteur.Typ` != "Organisation")

tweets <- tweets %>% dplyr::filter(Party %in% c("grÜne (basels starke alternative)", "grÜnliberale partei", 
                                                "sozialdemokratische partei der schweiz", "alternative - die grÜnen zug",
                                                "schweizerische volkspartei", "fdp.die liberalen", 
                                                "christlichsoziale volkspartei oberwallis",
                                                "grÜne partei der schweiz", "christlich-soziale partei",
                                                "christdemokratische volkspartei der schweiz",
                                                "christlichdemokratische volkspartei der schweiz",
                                                "bÜrgerlich-demokratische partei schweiz", "NA", NA)) %>% 
                     dplyr::mutate(Party = case_when(Party %in% c("grÜne (basels starke alternative)", 
                                                                  "grÜne partei der schweiz", 
                                                                  "alternative - die grÜnen zug") ~ "Grüne",
                                                     Party %in% c("sozialdemokratische partei der schweiz") ~ "SP",
                                                     Party %in% c("schweizerische volkspartei") ~ "SVP",
                                                     Party %in% c("fdp.die liberalen") ~ "FDP",
                                                     Party %in% c("christdemokratische volkspartei der schweiz",
                                                                  "christlichdemokratische volkspartei der schweiz",
                                                                  "christlich-soziale partei", 
                                                                  "christlichsoziale volkspartei oberwallis") ~ "CVP",
                                                     Party %in% c("grÜnliberale partei") ~ "GLP",
                                                     Party %in% c("bÜrgerlich-demokratische partei schweiz") ~"BDP",
                                                     TRUE ~ Party))

range(press$pubDateTime)
range(smd$pubDateTime)
range(tweets$Datum)


## 4.1) Encode Paper Names in so_txt since there are errors in the smd database:
smd <- smd %>% dplyr::mutate(so_txt = case_when(
                             so_txt %in% c("20 minuten online", "20 minutes","20 minuti") ~ "20 minuten", 
                             so_txt %in% c("Newsnet / 24 heures") ~ "24 heures",
                             so_txt %in% c("Newsnet / Basler Zeitung") ~ "Basler Zeitung",
                             so_txt %in% c("Newsnet / Berner Zeitung") ~ "Berner Zeitung",
                             so_txt %in% c("Newsnet / Der Bund") ~ "Der Bund",
                             so_txt %in% c("Newsnet / Le Matin") ~ "Le Matin",
                             so_txt %in% c("Newsnet / Tribune de Genève", "Tribune de Genève") ~ "Tribune de Genève",
                             so_txt %in% c("Newsnet / Tages-Anzeiger") ~ "Tages-Anzeiger",
                             so_txt %in% c("Handelszeitung online") ~ "Handelszeitung",
                             so_txt %in% c("rts.ch", "RTS.ch") ~ "srf.ch",
                             so_txt %in% c("SWI swissinfo.ch") ~ "swissinfo.ch",
                             so_txt %in% c("Finanz und Wirtschaft Online") ~ "Finanz und Wirtschaft",
                             so_txt %in% c("Anzeigen von Uster", "Anzegier von Uster") ~ "Anzeiger von Uster",
                             so_txt %in% c("L'Agefi") ~ "Agefi",
                             so_txt %in% c("Aargauer Zeitung", "Aargauer Zeitung / MLZ") ~ "Aargauer Zeitung",
                             so_txt %in% c("Migros-Magazin", "Migros Magazine") ~ "Migros-Magazin",
                             so_txt %in% c("Cooperazione", "Coopzeitung", "Coopération") ~ "Coopzeitung",
                             so_txt %in% c("L'Express / L'Impartial", "Arcinfo") ~ "Arcinfo",
                             TRUE ~ so_txt))

## Double Classifications of Political System and something else are decoded to the other 
## highly likely topic, since it is of greater interest to know on what subject the article 
## is on rather than knowing only that it has to do with the political system 
## (eg. election / poll / party)
smd$selectsclass <- gsub(".*,", "", smd$selectsclass)
press$selectsclass <- gsub(".*,", "", press$selectsclass)
tweets$selectsclass <- gsub(".*,", "", tweets$selectsclass)

# Transform Press Data:
.parties <- c("SVP", "GPS", "CVP","SPS", "FDP", "GLP", "BDP")
press_ana <- press %>% mutate(Akteur_Art = ifelse(Kürzel %in% .parties, "Party", 
                                                  ifelse(Kürzel == "admin.ch", "Gov", "Org"))) %>%
                       mutate(Party = ifelse(Kürzel %in% .parties, Kürzel, "")) %>%
                       dplyr::group_by(pubDateTime, selectsclass, Akteur_Art, Party) %>% 
                       summarise(n = n()) %>% 
                       ungroup() %>% 
                       dplyr::group_by(Akteur_Art, Party, pubDateTime) %>% 
                       mutate(freq = n / sum(n))

# Transform Twitter Data:
tweet_ana <- tweets %>% as.data.frame() %>%
                        dplyr::mutate(selectsclass = ifelse(selectsclass == "EU_Europe", "EU_Europa", selectsclass),
                                      selectsclass = ifelse(selectsclass == "Not_Classified", "Not Classified", selectsclass)) %>% 
                        dplyr::mutate(Akteur_Art = ifelse(Akteur.Typ %in% c("Party"), "Party",
                                                    ifelse(Akteur.Typ == "Person", "Candidate",
                                                      ifelse(Akteur.Typ == "Media", "Media", 
                                                        ifelse(Akteur.Typ %in% c("Administration","Departement"), "Gov",
                                                          ifelse(Akteur.Typ == "Organisation", "Org", "Other")))))) %>%
                        dplyr::filter(Akteur_Art != "Other") %>% 
                        dplyr::group_by(Datum, selectsclass, Akteur_Art, Party) %>%
                        dplyr::summarise(n = n()) %>% ungroup() %>% 
                        dplyr::group_by(Akteur_Art, Party, Datum) %>% 
                        dplyr::mutate(freq = n / sum(n))

# Change NA in Party Col to "":
tweet_ana$Party <- ifelse(tweet_ana$Party == "NA", "ALL", tweet_ana$Party)

press_ana_merge <- press_ana %>% dplyr::filter(Akteur_Art == "Party") %>% 
                                 dplyr::mutate(Party = ifelse(Party == "SPS", "SP", Party)) %>% 
                                 dplyr::mutate(Party = ifelse(Party == "GPS", "Grüne", Party))

press_ana <- press_ana %>% dplyr::filter(Akteur_Art != "Party")

# Rename columns of press_ana_merge
colnames(press_ana_merge) <- c("Datum","selectsclass","Akteur_Art","Party","n","freq")

# Pool Press Releases and Tweets by parties
tweet_ana_pool <- full_join(tweet_ana, press_ana_merge, by = c("Datum", "selectsclass", "Akteur_Art", "Party"))
tweet_ana_pool <- tweet_ana_pool %>% mutate(n.x = ifelse(is.na(n.x) == T, 0, n.x),
                                            n.y = ifelse(is.na(n.y) == T, 0, n.y)) %>% 
  mutate(n = `n.x` + `n.y`) %>% 
  group_by(Akteur_Art, Party, Datum) %>%
  mutate(freq = n / sum(n)) %>% 
  dplyr::select(-c("n.x","n.y","freq.x","freq.y"))


names(tweet_ana_pool)

parties <- c("SVP", "SP", "FDP", "CVP", "Grüne", "GLP", "BDP", "Rest")
parties_colors = c("#4B8A3E", "#F0554D", "#3872B5", "#D6862B", 
                   "#84B547", "#C4C43D", "#E6C820", "#B8B8B8")
names(parties_colors) <- parties
.fill2 <- unlist(colourList[['colour']][['parties']])

tweet_ana_pool %<>% ungroup() %>% 
                    rename(pubDateTime = Datum) %>%
                    mutate(pubDateTime = as.Date(pubDateTime)) %>%
                    complete(pubDateTime = seq.Date(min(pubDateTime), max(pubDateTime), by = "day"), selectsclass, Akteur_Art, Party) %>% 
                    dplyr::filter(is.na(Party) == F) %>% dplyr::filter(Akteur_Art == "Party")

# Transform SMD Data:
smd_ana <- smd %>% mutate(pubDateTime = as.Date(pubDateTime)) %>% 
                   group_by(pubDateTime, selectsclass) %>% 
                   summarise(n = n()) %>%
                   mutate(freq = n / sum(n))  %>% 
                   mutate(Party = "Media",
                          Akteur_Art = "Media")

# Merge SMD and Twitter Data
tweet_ana_pool <- rbind(tweet_ana_pool, smd_ana)

df_pl <- tweet_ana_pool %>%  dplyr::filter(is.na(Party) == F) %>% 
  dplyr::mutate(freq = ifelse(is.na(freq) == T, 0, freq),
                n = ifelse(is.na(n) == T, 0, n)) %>% 
  dplyr::filter(selectsclass %in% c("Environment_Energy", "EU_Europa", "GenderIssues_Discrimination", "Immigration_Asylum")) %>%
  dplyr::filter(!Party %in% c("ALL")) %>% 
  dplyr::mutate(selectsclass = case_when(selectsclass == "Environment_Energy" ~ "Environment",
                                         selectsclass == "EU_Europa" ~ "Europe",
                                         selectsclass == "GenderIssues_Discrimination" ~ "Gender",
                                         selectsclass == "Immigration_Asylum" ~ "Immigration"))

df_pl <- df_pl %>% group_by(Party, selectsclass) %>% 
  mutate(day = lubridate::date(as.Date(pubDateTime))) %>% 
  group_by(day, Party, selectsclass) %>% 
  summarise(n = sum(n)) %>% 
  mutate(Party = ifelse(Party == "Grüne", "Greens", Party))

df_pl$Party <- factor(df_pl$Party, level = c("Media","BDP","CVP","FDP","GLP","Greens","SP","SVP"))
##########################################################################################
# 5) Figure A2
##########################################################################################
topics_filter <- c("Environment", "Gender", "Europe", "Immigration")
values <- c("#009E73","#DD2461","#0072B2","#999999")
plots <- list()

# - helper data frame for limits by row in facets:
limit_df <- data.frame(lim_date = as.Date(rep("2019-04-07",8)), 
                       Party = c("Media","BDP","CVP","FDP","GLP","Greens","SP","SVP"),
                       value = c(400,12,12,12,12,12,12,12))

limit_df$Party <- factor(limit_df$Party, level = c("Media","BDP","CVP","FDP","GLP","Greens","SP","SVP"))

# - plotting
for(i in 1:length(topics_filter)){
  df_pl_t <- df_pl %>% filter(selectsclass == topics_filter[i])
  
  plots[[i]] <- ggplot(data = df_pl_t) +
    geom_bar(stat = "identity", aes(x = day, y = n,  group = selectsclass, color = selectsclass, 
                                    fill = selectsclass), position = position_dodge(width = 1),  alpha = 1) +
    geom_point(data = limit_df, aes(x = lim_date , y = value), alpha = 0) +
    scale_y_continuous(breaks= pretty_breaks()) +
    scale_x_date(breaks = "2 months", labels = date_format("%b %Y"), 
                 limits = c(as.Date("2019-01-01"), as.Date("2019-10-27")), expand = c(0, 0)) +
    facet_wrap(~Party, ncol = 1, nrow = 8, scales = "free_y") +
    labs(subtitle = "", title = "", y = "Number of documents per day", x = "", color = "", fill = "") +
    scale_color_manual(labels = topics_filter[i], 
                       values = values[i]) +
    scale_fill_manual(labels = topics_filter[i], 
                      values = values[i]) +
    ddl_theme(panel.grid.major = element_blank(),
              panel.grid.minor = element_blank()) +
    theme(legend.position = "none", legend.title = element_text(size = 16, color = "black"), 
          strip.background = element_blank(), strip.text = element_text(color = "black"),
          axis.text.x = element_text(angle = 45, hjust = 1, vjust = 1.0, size = 8, color = "black"),  
          axis.text.y = element_text(hjust=.5, size = 8, color = "black"),
          strip.text.x = element_text(size = 14, color = "black"),
          axis.title = element_text(size = 12, color = "black"),
          plot.title = element_blank(),
          legend.text = element_text(size = 12, color = "black"),
          legend.key = element_blank(),
          plot.margin = unit(c(.2,.2,.2,.2), "cm"),
          legend.key.size = unit(1.5,"line"),
          axis.line.x = element_line(color="black", size = .5),
          axis.line.y = element_line(color="black", size = .5),
          panel.spacing.y = unit(.2, "cm"),
          panel.spacing.x = unit(.2, "cm")) +
    guides(color = guide_legend(override.aes = list(size=2)))
}


time_pl <- cowplot::plot_grid(plotlist = plots, align = "hv", ncol = 4, label_x = c(-0.05,.22,.22,-.05),
                              labels = topics_filter, label_size = 16, label_fontface = "bold")

time_pl

ggsave(filename = paste0("../images/figure_A4.pdf"),  width = 21, height = 28, device = cairo_pdf, units = "cm", dpi = 300)
##########################################################################################